From 193af2f5c726788dd836c42561550029eadd7ce6 Mon Sep 17 00:00:00 2001 From: "kaf24@firebug.cl.cam.ac.uk" Date: Tue, 14 Feb 2006 16:23:43 +0100 Subject: [PATCH] Upgrade arch/x86/cpu/* files to their equivalents in linux-2.6.16-rc2/arch/i386/kernel/cpu/*. Also include kernel taint tracking and include that information, and Xen release info, in our crash dumps. Signed-off-by: Keir Fraser --- xen/arch/x86/Makefile | 3 +- xen/arch/x86/apic.c | 6 +- xen/arch/x86/cpu/amd.c | 41 +++- xen/arch/x86/cpu/common.c | 119 ++++++----- xen/arch/x86/cpu/intel.c | 30 +-- xen/arch/x86/cpu/mcheck/k7.c | 95 +++++++++ xen/arch/x86/cpu/mcheck/mce.c | 73 +++++++ xen/arch/x86/cpu/mcheck/mce.h | 14 ++ xen/arch/x86/cpu/mcheck/non-fatal.c | 86 ++++++++ xen/arch/x86/cpu/mcheck/p4.c | 270 +++++++++++++++++++++++++ xen/arch/x86/cpu/mcheck/p5.c | 52 +++++ xen/arch/x86/cpu/mcheck/p6.c | 118 +++++++++++ xen/arch/x86/cpu/mcheck/winchip.c | 37 ++++ xen/arch/x86/{ => cpu}/mtrr/amd.c | 0 xen/arch/x86/{ => cpu}/mtrr/centaur.c | 5 +- xen/arch/x86/{ => cpu}/mtrr/cyrix.c | 8 +- xen/arch/x86/{ => cpu}/mtrr/generic.c | 65 +++--- xen/arch/x86/{ => cpu}/mtrr/main.c | 166 ++++++++------- xen/arch/x86/{ => cpu}/mtrr/mtrr.h | 6 +- xen/arch/x86/{ => cpu}/mtrr/state.c | 4 +- xen/arch/x86/dom0_ops.c | 8 +- xen/arch/x86/hvm/svm/svm.c | 4 +- xen/arch/x86/hvm/vmx/vmx.c | 15 +- xen/arch/x86/i8259.c | 4 +- xen/arch/x86/setup.c | 2 +- xen/arch/x86/smp.c | 6 +- xen/arch/x86/smpboot.c | 4 +- xen/arch/x86/traps.c | 12 +- xen/arch/x86/x86_32/traps.c | 9 +- xen/arch/x86/x86_64/mm.c | 6 +- xen/arch/x86/x86_64/traps.c | 5 + xen/common/kernel.c | 33 +++ xen/include/asm-x86/bitops.h | 52 +++++ xen/include/asm-x86/config.h | 1 + xen/include/asm-x86/cpufeature.h | 7 +- xen/include/asm-x86/msr.h | 14 +- xen/include/asm-x86/processor.h | 55 +++-- xen/include/asm-x86/x86_32/asm_defns.h | 13 +- xen/include/asm-x86/x86_64/asm_defns.h | 14 +- xen/include/xen/bitops.h | 49 +++++ xen/include/xen/init.h | 2 + xen/include/xen/lib.h | 8 + 42 files changed, 1238 insertions(+), 283 deletions(-) create mode 100644 xen/arch/x86/cpu/mcheck/k7.c create mode 100644 xen/arch/x86/cpu/mcheck/mce.c create mode 100644 xen/arch/x86/cpu/mcheck/mce.h create mode 100644 xen/arch/x86/cpu/mcheck/non-fatal.c create mode 100644 xen/arch/x86/cpu/mcheck/p4.c create mode 100644 xen/arch/x86/cpu/mcheck/p5.c create mode 100644 xen/arch/x86/cpu/mcheck/p6.c create mode 100644 xen/arch/x86/cpu/mcheck/winchip.c rename xen/arch/x86/{ => cpu}/mtrr/amd.c (100%) rename xen/arch/x86/{ => cpu}/mtrr/centaur.c (98%) rename xen/arch/x86/{ => cpu}/mtrr/cyrix.c (97%) rename xen/arch/x86/{ => cpu}/mtrr/generic.c (85%) rename xen/arch/x86/{ => cpu}/mtrr/main.c (86%) rename xen/arch/x86/{ => cpu}/mtrr/mtrr.h (95%) rename xen/arch/x86/{ => cpu}/mtrr/state.c (93%) diff --git a/xen/arch/x86/Makefile b/xen/arch/x86/Makefile index 91aeddfa05..14af01001c 100644 --- a/xen/arch/x86/Makefile +++ b/xen/arch/x86/Makefile @@ -4,9 +4,10 @@ include $(BASEDIR)/Rules.mk OBJS += $(patsubst %.S,%.o,$(wildcard $(TARGET_SUBARCH)/*.S)) OBJS += $(patsubst %.c,%.o,$(wildcard $(TARGET_SUBARCH)/*.c)) OBJS += $(patsubst %.c,%.o,$(wildcard acpi/*.c)) -OBJS += $(patsubst %.c,%.o,$(wildcard mtrr/*.c)) OBJS += $(patsubst %.c,%.o,$(wildcard genapic/*.c)) OBJS += $(patsubst %.c,%.o,$(wildcard cpu/*.c)) +OBJS += $(patsubst %.c,%.o,$(wildcard cpu/mcheck/*.c)) +OBJS += $(patsubst %.c,%.o,$(wildcard cpu/mtrr/*.c)) OBJS += $(patsubst %.c,%.o,$(wildcard hvm/*.c)) OBJS += $(patsubst %.c,%.o,$(wildcard hvm/vmx/*.c)) OBJS += $(patsubst %.S,%.o,$(wildcard hvm/vmx/$(TARGET_SUBARCH)/*.S)) diff --git a/xen/arch/x86/apic.c b/xen/arch/x86/apic.c index 7eb6000b37..b4af17b3ae 100644 --- a/xen/arch/x86/apic.c +++ b/xen/arch/x86/apic.c @@ -927,7 +927,7 @@ int reprogram_timer(s_time_t timeout) return 1; } -void smp_apic_timer_interrupt(struct cpu_user_regs * regs) +fastcall void smp_apic_timer_interrupt(struct cpu_user_regs * regs) { ack_APIC_irq(); perfc_incrc(apic_timer); @@ -937,7 +937,7 @@ void smp_apic_timer_interrupt(struct cpu_user_regs * regs) /* * This interrupt should _never_ happen with our APIC/SMP architecture */ -asmlinkage void smp_spurious_interrupt(struct cpu_user_regs *regs) +fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs) { unsigned long v; @@ -959,7 +959,7 @@ asmlinkage void smp_spurious_interrupt(struct cpu_user_regs *regs) * This interrupt should never happen with our APIC/SMP architecture */ -asmlinkage void smp_error_interrupt(struct cpu_user_regs *regs) +fastcall void smp_error_interrupt(struct cpu_user_regs *regs) { unsigned long v, v1; diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c index 6dcf15c527..fe4b078aa8 100644 --- a/xen/arch/x86/cpu/amd.c +++ b/xen/arch/x86/cpu/amd.c @@ -48,6 +48,22 @@ static void __init init_amd(struct cpuinfo_x86 *c) int mbytes = num_physpages >> (20-PAGE_SHIFT); int r; +#ifdef CONFIG_SMP + unsigned long long value; + + /* Disable TLB flush filter by setting HWCR.FFDIS on K8 + * bit 6 of msr C001_0015 + * + * Errata 63 for SH-B3 steppings + * Errata 122 for all steppings (F+ have it disabled by default) + */ + if (c->x86 == 15) { + rdmsrl(MSR_K7_HWCR, value); + value |= 1 << 6; + wrmsrl(MSR_K7_HWCR, value); + } +#endif + /* * FIXME: We should handle the K5 here. Set up the write * range and also turn on MSR 83 bits 4 and 31 (write alloc, @@ -165,8 +181,13 @@ static void __init init_amd(struct cpuinfo_x86 *c) set_bit(X86_FEATURE_K6_MTRR, c->x86_capability); break; } - break; + if (c->x86_model == 10) { + /* AMD Geode LX is model 10 */ + /* placeholder for any needed mods */ + break; + } + break; case 6: /* An Athlon/Duron */ /* Bit 15 of Athlon specific MSR 15, needs to be 0 @@ -225,9 +246,15 @@ static void __init init_amd(struct cpuinfo_x86 *c) display_cacheinfo(c); if (cpuid_eax(0x80000000) >= 0x80000008) { - c->x86_num_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; - if (c->x86_num_cores & (c->x86_num_cores - 1)) - c->x86_num_cores = 1; + c->x86_max_cores = (cpuid_ecx(0x80000008) & 0xff) + 1; + if (c->x86_max_cores & (c->x86_max_cores - 1)) + c->x86_max_cores = 1; + } + + if (cpuid_eax(0x80000000) >= 0x80000007) { + c->x86_power = cpuid_edx(0x80000007); + if (c->x86_power & (1<<8)) + set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); } #ifdef CONFIG_X86_HT @@ -236,15 +263,15 @@ static void __init init_amd(struct cpuinfo_x86 *c) * distingush the cores. Assumes number of cores is a power * of two. */ - if (c->x86_num_cores > 1) { + if (c->x86_max_cores > 1) { int cpu = smp_processor_id(); unsigned bits = 0; - while ((1 << bits) < c->x86_num_cores) + while ((1 << bits) < c->x86_max_cores) bits++; cpu_core_id[cpu] = phys_proc_id[cpu] & ((1<>= bits; printk(KERN_INFO "CPU %d(%d) -> Core %d\n", - cpu, c->x86_num_cores, cpu_core_id[cpu]); + cpu, c->x86_max_cores, cpu_core_id[cpu]); } #endif diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c index 49661af7d8..beae3f5925 100644 --- a/xen/arch/x86/cpu/common.c +++ b/xen/arch/x86/cpu/common.c @@ -17,14 +17,12 @@ #define tsc_disable 0 #define disable_pse 0 -static int cachesize_override __initdata = -1; -static int disable_x86_fxsr __initdata = 0; -static int disable_x86_serial_nr __initdata = 1; +static int cachesize_override __devinitdata = -1; +static int disable_x86_fxsr __devinitdata = 0; +static int disable_x86_serial_nr __devinitdata = 0; struct cpu_dev * cpu_devs[X86_VENDOR_NUM] = {}; -extern void mcheck_init(struct cpuinfo_x86 *c); - static void default_init(struct cpuinfo_x86 * c) { /* Not much we can do here... */ @@ -43,7 +41,9 @@ static struct cpu_dev default_cpu = { }; static struct cpu_dev * this_cpu = &default_cpu; -int __init get_model_name(struct cpuinfo_x86 *c) +integer_param("cachesize", cachesize_override); + +int __devinit get_model_name(struct cpuinfo_x86 *c) { unsigned int *v; char *p, *q; @@ -73,7 +73,7 @@ int __init get_model_name(struct cpuinfo_x86 *c) } -void __init display_cacheinfo(struct cpuinfo_x86 *c) +void __devinit display_cacheinfo(struct cpuinfo_x86 *c) { unsigned int n, dummy, ecx, edx, l2size; @@ -114,7 +114,7 @@ void __init display_cacheinfo(struct cpuinfo_x86 *c) /* in particular, if CPUID levels 0x80000002..4 are supported, this isn't used */ /* Look up CPU names by table lookup. */ -static char __init *table_lookup_model(struct cpuinfo_x86 *c) +static char __devinit *table_lookup_model(struct cpuinfo_x86 *c) { struct cpu_model_info *info; @@ -135,7 +135,7 @@ static char __init *table_lookup_model(struct cpuinfo_x86 *c) } -void __init get_cpu_vendor(struct cpuinfo_x86 *c, int early) +static void __devinit get_cpu_vendor(struct cpuinfo_x86 *c, int early) { char *v = c->x86_vendor_id; int i; @@ -155,12 +155,7 @@ void __init get_cpu_vendor(struct cpuinfo_x86 *c, int early) } -static int __init x86_fxsr_setup(char * s) -{ - disable_x86_fxsr = 1; - return 1; -} -__setup("nofxsr", x86_fxsr_setup); +boolean_param("nofxsr", disable_x86_fxsr); /* Standard macro to see if a specific flag is changeable */ @@ -186,14 +181,17 @@ static inline int flag_is_changeable_p(unsigned long flag) /* Probe for the CPUID instruction */ -static int __init have_cpuid_p(void) +static int __devinit have_cpuid_p(void) { return flag_is_changeable_p(X86_EFLAGS_ID); } /* Do minimum CPU detection early. Fields really needed: vendor, cpuid_level, family, model, mask, cache alignment. - The others are not touched to avoid unwanted side effects. */ + The others are not touched to avoid unwanted side effects. + + WARNING: this function is only called on the BP. Don't add code here + that is supposed to run on all CPUs. */ static void __init early_cpu_detect(void) { struct cpuinfo_x86 *c = &boot_cpu_data; @@ -217,24 +215,18 @@ static void __init early_cpu_detect(void) cpuid(0x00000001, &tfms, &misc, &junk, &cap0); c->x86 = (tfms >> 8) & 15; c->x86_model = (tfms >> 4) & 15; - if (c->x86 == 0xf) { + if (c->x86 == 0xf) c->x86 += (tfms >> 20) & 0xff; + if (c->x86 >= 0x6) c->x86_model += ((tfms >> 16) & 0xF) << 4; - } c->x86_mask = tfms & 15; if (cap0 & (1<<19)) c->x86_cache_alignment = ((misc >> 8) & 0xff) * 8; c->x86_capability[0] = cap0; /* Added for Xen bootstrap */ } - - early_intel_workaround(c); - -#ifdef CONFIG_X86_HT - phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff; -#endif } -void __init generic_identify(struct cpuinfo_x86 * c) +void __devinit generic_identify(struct cpuinfo_x86 * c) { u32 tfms, xlvl; int junk; @@ -279,9 +271,15 @@ void __init generic_identify(struct cpuinfo_x86 * c) get_model_name(c); /* Default name */ } } + + early_intel_workaround(c); + +#ifdef CONFIG_X86_HT + phys_proc_id[smp_processor_id()] = (cpuid_ebx(1) >> 24) & 0xff; +#endif } -static void __init squash_the_stupid_serial_number(struct cpuinfo_x86 *c) +static void __devinit squash_the_stupid_serial_number(struct cpuinfo_x86 *c) { if (cpu_has(c, X86_FEATURE_PN) && disable_x86_serial_nr ) { /* Disable processor serial number */ @@ -297,19 +295,14 @@ static void __init squash_the_stupid_serial_number(struct cpuinfo_x86 *c) } } -static int __init x86_serial_nr_setup(char *s) -{ - disable_x86_serial_nr = 0; - return 1; -} -__setup("serialnumber", x86_serial_nr_setup); +boolean_param("noserialnumber", disable_x86_serial_nr); /* * This does the hard work of actually picking apart the CPU stuff... */ -void __init identify_cpu(struct cpuinfo_x86 *c) +void __devinit identify_cpu(struct cpuinfo_x86 *c) { int i; @@ -319,7 +312,7 @@ void __init identify_cpu(struct cpuinfo_x86 *c) c->x86_model = c->x86_mask = 0; /* So far unknown... */ c->x86_vendor_id[0] = '\0'; /* Unset */ c->x86_model_id[0] = '\0'; /* Unset */ - c->x86_num_cores = 1; + c->x86_max_cores = 1; memset(&c->x86_capability, 0, sizeof c->x86_capability); if (!have_cpuid_p()) { @@ -342,6 +335,7 @@ void __init identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_identify) { this_cpu->c_identify(c); + #ifdef NOISY_CAPS printk(KERN_DEBUG "CPU: After vendor identify, caps:"); for (i = 0; i < NCAPINTS; i++) @@ -397,12 +391,14 @@ void __init identify_cpu(struct cpuinfo_x86 *c) } /* Now the feature flags better reflect actual CPU features! */ + #ifdef NOISY_CAPS printk(KERN_DEBUG "CPU: After all inits, caps:"); for (i = 0; i < NCAPINTS; i++) printk(" %08x", c->x86_capability[i]); printk("\n"); #endif + /* * On SMP, boot_cpu_data holds the common feature set between * all CPUs; so make sure that we indicate which features are @@ -416,68 +412,69 @@ void __init identify_cpu(struct cpuinfo_x86 *c) } /* Init Machine Check Exception if available. */ -#ifdef CONFIG_X86_MCE mcheck_init(c); + +#if 0 + if (c == &boot_cpu_data) + sysenter_setup(); + enable_sep_cpu(); #endif + + if (c == &boot_cpu_data) + mtrr_bp_init(); + else + mtrr_ap_init(); } #ifdef CONFIG_X86_HT -void __init detect_ht(struct cpuinfo_x86 *c) +void __devinit detect_ht(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; - int index_msb, tmp; + int index_msb, core_bits; int cpu = smp_processor_id(); + cpuid(1, &eax, &ebx, &ecx, &edx); + + c->apicid = phys_pkg_id((ebx >> 24) & 0xFF, 0); + if (!cpu_has(c, X86_FEATURE_HT) || cpu_has(c, X86_FEATURE_CMP_LEGACY)) return; - cpuid(1, &eax, &ebx, &ecx, &edx); smp_num_siblings = (ebx & 0xff0000) >> 16; if (smp_num_siblings == 1) { printk(KERN_INFO "CPU: Hyper-Threading is disabled\n"); } else if (smp_num_siblings > 1 ) { - index_msb = 31; if (smp_num_siblings > NR_CPUS) { printk(KERN_WARNING "CPU: Unsupported number of the siblings %d", smp_num_siblings); smp_num_siblings = 1; return; } - tmp = smp_num_siblings; - while ((tmp & 0x80000000 ) == 0) { - tmp <<=1 ; - index_msb--; - } - if (smp_num_siblings & (smp_num_siblings - 1)) - index_msb++; + + index_msb = get_count_order(smp_num_siblings); phys_proc_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); printk(KERN_INFO "CPU: Physical Processor ID: %d\n", phys_proc_id[cpu]); - smp_num_siblings = smp_num_siblings / c->x86_num_cores; + smp_num_siblings = smp_num_siblings / c->x86_max_cores; - tmp = smp_num_siblings; - index_msb = 31; - while ((tmp & 0x80000000) == 0) { - tmp <<=1 ; - index_msb--; - } + index_msb = get_count_order(smp_num_siblings) ; - if (smp_num_siblings & (smp_num_siblings - 1)) - index_msb++; + core_bits = get_count_order(c->x86_max_cores); - cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb); + cpu_core_id[cpu] = phys_pkg_id((ebx >> 24) & 0xFF, index_msb) & + ((1 << core_bits) - 1); - if (c->x86_num_cores > 1) + if (c->x86_max_cores > 1) printk(KERN_INFO "CPU: Processor Core ID: %d\n", cpu_core_id[cpu]); } } #endif -void __init print_cpu_info(struct cpuinfo_x86 *c) +void __devinit print_cpu_info(struct cpuinfo_x86 *c) { char *vendor = NULL; @@ -500,7 +497,7 @@ void __init print_cpu_info(struct cpuinfo_x86 *c) printk("\n"); } -cpumask_t cpu_initialized __initdata = CPU_MASK_NONE; +cpumask_t cpu_initialized __devinitdata = CPU_MASK_NONE; /* This is hacky. :) * We're emulating future behavior. @@ -537,7 +534,7 @@ void __init early_cpu_init(void) * and IDT. We reload them nevertheless, this function acts as a * 'CPU state barrier', nothing should get across. */ -void __init cpu_init (void) +void __devinit cpu_init(void) { int cpu = smp_processor_id(); struct tss_struct *t = &init_tss[cpu]; diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c index 9062b8786a..1aff3d0d4f 100644 --- a/xen/arch/x86/cpu/intel.c +++ b/xen/arch/x86/cpu/intel.c @@ -22,10 +22,10 @@ extern int trap_init_f00f_bug(void); /* * Alignment at which movsl is preferred for bulk memory copies. */ -struct movsl_mask movsl_mask; +struct movsl_mask movsl_mask __read_mostly; #endif -void __init early_intel_workaround(struct cpuinfo_x86 *c) +void __devinit early_intel_workaround(struct cpuinfo_x86 *c) { if (c->x86_vendor != X86_VENDOR_INTEL) return; @@ -40,7 +40,7 @@ void __init early_intel_workaround(struct cpuinfo_x86 *c) * This is called before we do cpu ident work */ -int __init ppro_with_ram_bug(void) +int __devinit ppro_with_ram_bug(void) { /* Uses data from early_cpu_detect now */ if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && @@ -58,7 +58,7 @@ int __init ppro_with_ram_bug(void) * P4 Xeon errata 037 workaround. * Hardware prefetcher may cause stale data to be loaded into the cache. */ -static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c) +static void __devinit Intel_errata_workarounds(struct cpuinfo_x86 *c) { unsigned long lo, hi; @@ -77,25 +77,22 @@ static void __init Intel_errata_workarounds(struct cpuinfo_x86 *c) /* * find out the number of processor cores on the die */ -static int __init num_cpu_cores(struct cpuinfo_x86 *c) +static int __devinit num_cpu_cores(struct cpuinfo_x86 *c) { - unsigned int eax; + unsigned int eax, ebx, ecx, edx; if (c->cpuid_level < 4) return 1; - __asm__("cpuid" - : "=a" (eax) - : "0" (4), "c" (0) - : "bx", "dx"); - + /* Intel has a non-standard dependency on %ecx for this CPUID level. */ + cpuid_count(4, 0, &eax, &ebx, &ecx, &edx); if (eax & 0x1f) return ((eax >> 26) + 1); else return 1; } -static void __init init_intel(struct cpuinfo_x86 *c) +static void __devinit init_intel(struct cpuinfo_x86 *c) { unsigned int l2 = 0; char *p = NULL; @@ -157,7 +154,7 @@ static void __init init_intel(struct cpuinfo_x86 *c) if ( p ) strcpy(c->x86_model_id, p); - c->x86_num_cores = num_cpu_cores(c); + c->x86_max_cores = num_cpu_cores(c); detect_ht(c); @@ -182,10 +179,13 @@ static void __init init_intel(struct cpuinfo_x86 *c) } #endif - if (c->x86 == 15) + if (c->x86 == 15) set_bit(X86_FEATURE_P4, c->x86_capability); if (c->x86 == 6) set_bit(X86_FEATURE_P3, c->x86_capability); + if ((c->x86 == 0xf && c->x86_model >= 0x03) || + (c->x86 == 0x6 && c->x86_model >= 0x0e)) + set_bit(X86_FEATURE_CONSTANT_TSC, c->x86_capability); start_vmx(); } @@ -203,7 +203,7 @@ static unsigned int intel_size_cache(struct cpuinfo_x86 * c, unsigned int size) return size; } -static struct cpu_dev intel_cpu_dev __initdata = { +static struct cpu_dev intel_cpu_dev __devinitdata = { .c_vendor = "Intel", .c_ident = { "GenuineIntel" }, .c_models = { diff --git a/xen/arch/x86/cpu/mcheck/k7.c b/xen/arch/x86/cpu/mcheck/k7.c new file mode 100644 index 0000000000..456bb5ddde --- /dev/null +++ b/xen/arch/x86/cpu/mcheck/k7.c @@ -0,0 +1,95 @@ +/* + * Athlon/Hammer specific Machine Check Exception Reporting + * (C) Copyright 2002 Dave Jones + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mce.h" + +/* Machine Check Handler For AMD Athlon/Duron */ +static fastcall void k7_machine_check(struct cpu_user_regs * regs, long error_code) +{ + int recover=1; + u32 alow, ahigh, high, low; + u32 mcgstl, mcgsth; + int i; + + rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); + if (mcgstl & (1<<0)) /* Recoverable ? */ + recover=0; + + printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", + smp_processor_id(), mcgsth, mcgstl); + + for (i=1; i, Dave Jones + */ + +#include +#include +#include +#include +#include + +#include +#include + +#include "mce.h" + +int mce_disabled = 0; +int nr_mce_banks; + +/* Handle unconfigured int18 (should never happen) */ +static fastcall void unexpected_machine_check(struct cpu_user_regs * regs, long error_code) +{ + printk(KERN_ERR "CPU#%d: Unexpected int18 (Machine Check).\n", smp_processor_id()); +} + +/* Call the installed machine check handler for this CPU setup. */ +void fastcall (*machine_check_vector)(struct cpu_user_regs *, long error_code) = unexpected_machine_check; + +/* This has to be run for each processor */ +void mcheck_init(struct cpuinfo_x86 *c) +{ + if (mce_disabled==1) + return; + + switch (c->x86_vendor) { + case X86_VENDOR_AMD: + if (c->x86==6 || c->x86==15) + amd_mcheck_init(c); + break; + + case X86_VENDOR_INTEL: + if (c->x86==5) + intel_p5_mcheck_init(c); + if (c->x86==6) + intel_p6_mcheck_init(c); + if (c->x86==15) + intel_p4_mcheck_init(c); + break; + + case X86_VENDOR_CENTAUR: + if (c->x86==5) + winchip_mcheck_init(c); + break; + + default: + break; + } +} + +static int __init mcheck_disable(char *str) +{ + mce_disabled = 1; + return 0; +} + +static int __init mcheck_enable(char *str) +{ + mce_disabled = -1; + return 0; +} + +__setup("nomce", mcheck_disable); +__setup("mce", mcheck_enable); diff --git a/xen/arch/x86/cpu/mcheck/mce.h b/xen/arch/x86/cpu/mcheck/mce.h new file mode 100644 index 0000000000..0f1d38f734 --- /dev/null +++ b/xen/arch/x86/cpu/mcheck/mce.h @@ -0,0 +1,14 @@ +#include + +void amd_mcheck_init(struct cpuinfo_x86 *c); +void intel_p4_mcheck_init(struct cpuinfo_x86 *c); +void intel_p5_mcheck_init(struct cpuinfo_x86 *c); +void intel_p6_mcheck_init(struct cpuinfo_x86 *c); +void winchip_mcheck_init(struct cpuinfo_x86 *c); + +/* Call the installed machine check handler for this CPU setup. */ +extern fastcall void (*machine_check_vector)(struct cpu_user_regs *, long error_code); + +extern int mce_disabled __initdata; +extern int nr_mce_banks; + diff --git a/xen/arch/x86/cpu/mcheck/non-fatal.c b/xen/arch/x86/cpu/mcheck/non-fatal.c new file mode 100644 index 0000000000..42ea833166 --- /dev/null +++ b/xen/arch/x86/cpu/mcheck/non-fatal.c @@ -0,0 +1,86 @@ +/* + * Non Fatal Machine Check Exception Reporting + * + * (C) Copyright 2002 Dave Jones. + * + * This file contains routines to check for non-fatal MCEs every 15s + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mce.h" + +static int firstbank; +static struct timer mce_timer; + +#define MCE_PERIOD MILLISECS(15000) + +static void mce_checkregs (void *info) +{ + u32 low, high; + int i; + + for (i=firstbank; i +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "mce.h" + +/* as supported by the P4/Xeon family */ +struct intel_mce_extended_msrs { + u32 eax; + u32 ebx; + u32 ecx; + u32 edx; + u32 esi; + u32 edi; + u32 ebp; + u32 esp; + u32 eflags; + u32 eip; + /* u32 *reserved[]; */ +}; + +static int mce_num_extended_msrs = 0; + + +#ifdef CONFIG_X86_MCE_P4THERMAL +static void unexpected_thermal_interrupt(struct cpu_user_regs *regs) +{ + printk(KERN_ERR "CPU%d: Unexpected LVT TMR interrupt!\n", + smp_processor_id()); + add_taint(TAINT_MACHINE_CHECK); +} + +/* P4/Xeon Thermal transition interrupt handler */ +static void intel_thermal_interrupt(struct cpu_user_regs *regs) +{ + u32 l, h; + unsigned int cpu = smp_processor_id(); + static s_time_t next[NR_CPUS]; + + ack_APIC_irq(); + + if (NOW() > next[cpu]) + return; + + next[cpu] = NOW() + MILLISECS(5000); + rdmsr(MSR_IA32_THERM_STATUS, l, h); + if (l & 0x1) { + printk(KERN_EMERG "CPU%d: Temperature above threshold\n", cpu); + printk(KERN_EMERG "CPU%d: Running in modulated clock mode\n", + cpu); + add_taint(TAINT_MACHINE_CHECK); + } else { + printk(KERN_INFO "CPU%d: Temperature/speed normal\n", cpu); + } +} + +/* Thermal interrupt handler for this CPU setup */ +static void (*vendor_thermal_interrupt)(struct cpu_user_regs *regs) = unexpected_thermal_interrupt; + +fastcall void smp_thermal_interrupt(struct cpu_user_regs *regs) +{ + irq_enter(smp_processor_id()); + vendor_thermal_interrupt(regs); + irq_exit(smp_processor_id()); +} + +/* P4/Xeon Thermal regulation detect and init */ +static void intel_init_thermal(struct cpuinfo_x86 *c) +{ + u32 l, h; + unsigned int cpu = smp_processor_id(); + + /* Thermal monitoring */ + if (!cpu_has(c, X86_FEATURE_ACPI)) + return; /* -ENODEV */ + + /* Clock modulation */ + if (!cpu_has(c, X86_FEATURE_ACC)) + return; /* -ENODEV */ + + /* first check if its enabled already, in which case there might + * be some SMM goo which handles it, so we can't even put a handler + * since it might be delivered via SMI already -zwanem. + */ + rdmsr (MSR_IA32_MISC_ENABLE, l, h); + h = apic_read(APIC_LVTTHMR); + if ((l & (1<<3)) && (h & APIC_DM_SMI)) { + printk(KERN_DEBUG "CPU%d: Thermal monitoring handled by SMI\n", + cpu); + return; /* -EBUSY */ + } + + /* check whether a vector already exists, temporarily masked? */ + if (h & APIC_VECTOR_MASK) { + printk(KERN_DEBUG "CPU%d: Thermal LVT vector (%#x) already " + "installed\n", + cpu, (h & APIC_VECTOR_MASK)); + return; /* -EBUSY */ + } + + /* The temperature transition interrupt handler setup */ + h = THERMAL_APIC_VECTOR; /* our delivery vector */ + h |= (APIC_DM_FIXED | APIC_LVT_MASKED); /* we'll mask till we're ready */ + apic_write_around(APIC_LVTTHMR, h); + + rdmsr (MSR_IA32_THERM_INTERRUPT, l, h); + wrmsr (MSR_IA32_THERM_INTERRUPT, l | 0x03 , h); + + /* ok we're good to go... */ + vendor_thermal_interrupt = intel_thermal_interrupt; + + rdmsr (MSR_IA32_MISC_ENABLE, l, h); + wrmsr (MSR_IA32_MISC_ENABLE, l | (1<<3), h); + + l = apic_read (APIC_LVTTHMR); + apic_write_around (APIC_LVTTHMR, l & ~APIC_LVT_MASKED); + printk (KERN_INFO "CPU%d: Thermal monitoring enabled\n", cpu); + return; +} +#endif /* CONFIG_X86_MCE_P4THERMAL */ + + +/* P4/Xeon Extended MCE MSR retrieval, return 0 if unsupported */ +static inline int intel_get_extended_msrs(struct intel_mce_extended_msrs *r) +{ + u32 h; + + if (mce_num_extended_msrs == 0) + goto done; + + rdmsr (MSR_IA32_MCG_EAX, r->eax, h); + rdmsr (MSR_IA32_MCG_EBX, r->ebx, h); + rdmsr (MSR_IA32_MCG_ECX, r->ecx, h); + rdmsr (MSR_IA32_MCG_EDX, r->edx, h); + rdmsr (MSR_IA32_MCG_ESI, r->esi, h); + rdmsr (MSR_IA32_MCG_EDI, r->edi, h); + rdmsr (MSR_IA32_MCG_EBP, r->ebp, h); + rdmsr (MSR_IA32_MCG_ESP, r->esp, h); + rdmsr (MSR_IA32_MCG_EFLAGS, r->eflags, h); + rdmsr (MSR_IA32_MCG_EIP, r->eip, h); + + /* can we rely on kmalloc to do a dynamic + * allocation for the reserved registers? + */ +done: + return mce_num_extended_msrs; +} + +static fastcall void intel_machine_check(struct cpu_user_regs * regs, long error_code) +{ + int recover=1; + u32 alow, ahigh, high, low; + u32 mcgstl, mcgsth; + int i; + struct intel_mce_extended_msrs dbg; + + rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); + if (mcgstl & (1<<0)) /* Recoverable ? */ + recover=0; + + printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", + smp_processor_id(), mcgsth, mcgstl); + + if (intel_get_extended_msrs(&dbg)) { + printk (KERN_DEBUG "CPU %d: EIP: %08x EFLAGS: %08x\n", + smp_processor_id(), dbg.eip, dbg.eflags); + printk (KERN_DEBUG "\teax: %08x ebx: %08x ecx: %08x edx: %08x\n", + dbg.eax, dbg.ebx, dbg.ecx, dbg.edx); + printk (KERN_DEBUG "\tesi: %08x edi: %08x ebp: %08x esp: %08x\n", + dbg.esi, dbg.edi, dbg.ebp, dbg.esp); + } + + for (i=0; i> 16) & 0xff; + printk (KERN_INFO "CPU%d: Intel P4/Xeon Extended MCE MSRs (%d)" + " available\n", + smp_processor_id(), mce_num_extended_msrs); + +#ifdef CONFIG_X86_MCE_P4THERMAL + /* Check for P4/Xeon Thermal monitor */ + intel_init_thermal(c); +#endif + } +} diff --git a/xen/arch/x86/cpu/mcheck/p5.c b/xen/arch/x86/cpu/mcheck/p5.c new file mode 100644 index 0000000000..ac952af082 --- /dev/null +++ b/xen/arch/x86/cpu/mcheck/p5.c @@ -0,0 +1,52 @@ +/* + * P5 specific Machine Check Exception Reporting + * (C) Copyright 2002 Alan Cox + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include "mce.h" + +/* Machine check handler for Pentium class Intel */ +static fastcall void pentium_machine_check(struct cpu_user_regs * regs, long error_code) +{ + u32 loaddr, hi, lotype; + rdmsr(MSR_IA32_P5_MC_ADDR, loaddr, hi); + rdmsr(MSR_IA32_P5_MC_TYPE, lotype, hi); + printk(KERN_EMERG "CPU#%d: Machine Check Exception: 0x%8X (type 0x%8X).\n", smp_processor_id(), loaddr, lotype); + if(lotype&(1<<5)) + printk(KERN_EMERG "CPU#%d: Possible thermal failure (CPU on fire ?).\n", smp_processor_id()); + add_taint(TAINT_MACHINE_CHECK); +} + +/* Set up machine check reporting for processors with Intel style MCE */ +void intel_p5_mcheck_init(struct cpuinfo_x86 *c) +{ + u32 l, h; + + /*Check for MCE support */ + if( !cpu_has(c, X86_FEATURE_MCE) ) + return; + + /* Default P5 to off as its often misconnected */ + if(mce_disabled != -1) + return; + machine_check_vector = pentium_machine_check; + wmb(); + + /* Read registers before enabling */ + rdmsr(MSR_IA32_P5_MC_ADDR, l, h); + rdmsr(MSR_IA32_P5_MC_TYPE, l, h); + printk(KERN_INFO "Intel old style machine check architecture supported.\n"); + + /* Enable MCE */ + set_in_cr4(X86_CR4_MCE); + printk(KERN_INFO "Intel old style machine check reporting enabled on CPU#%d.\n", smp_processor_id()); +} diff --git a/xen/arch/x86/cpu/mcheck/p6.c b/xen/arch/x86/cpu/mcheck/p6.c new file mode 100644 index 0000000000..cfa500d331 --- /dev/null +++ b/xen/arch/x86/cpu/mcheck/p6.c @@ -0,0 +1,118 @@ +/* + * P6 specific Machine Check Exception Reporting + * (C) Copyright 2002 Alan Cox + */ + +#include +#include +#include +#include + +#include +#include +#include + +#include "mce.h" + +/* Machine Check Handler For PII/PIII */ +static fastcall void intel_machine_check(struct cpu_user_regs * regs, long error_code) +{ + int recover=1; + u32 alow, ahigh, high, low; + u32 mcgstl, mcgsth; + int i; + + rdmsr (MSR_IA32_MCG_STATUS, mcgstl, mcgsth); + if (mcgstl & (1<<0)) /* Recoverable ? */ + recover=0; + + printk (KERN_EMERG "CPU %d: Machine Check Exception: %08x%08x\n", + smp_processor_id(), mcgsth, mcgstl); + + for (i=0; i + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "mce.h" + +/* Machine check handler for WinChip C6 */ +static fastcall void winchip_machine_check(struct cpu_user_regs * regs, long error_code) +{ + printk(KERN_EMERG "CPU0: Machine Check Exception.\n"); + add_taint(TAINT_MACHINE_CHECK); +} + +/* Set up machine check reporting on the Winchip C6 series */ +void winchip_mcheck_init(struct cpuinfo_x86 *c) +{ + u32 lo, hi; + machine_check_vector = winchip_machine_check; + wmb(); + rdmsr(MSR_IDT_FCR1, lo, hi); + lo|= (1<<2); /* Enable EIERRINT (int 18 MCE) */ + lo&= ~(1<<4); /* Enable MCE */ + wrmsr(MSR_IDT_FCR1, lo, hi); + set_in_cr4(X86_CR4_MCE); + printk(KERN_INFO "Winchip machine check reporting enabled on CPU#0.\n"); +} diff --git a/xen/arch/x86/mtrr/amd.c b/xen/arch/x86/cpu/mtrr/amd.c similarity index 100% rename from xen/arch/x86/mtrr/amd.c rename to xen/arch/x86/cpu/mtrr/amd.c diff --git a/xen/arch/x86/mtrr/centaur.c b/xen/arch/x86/cpu/mtrr/centaur.c similarity index 98% rename from xen/arch/x86/mtrr/centaur.c rename to xen/arch/x86/cpu/mtrr/centaur.c index efebacc191..72cc20298f 100644 --- a/xen/arch/x86/mtrr/centaur.c +++ b/xen/arch/x86/cpu/mtrr/centaur.c @@ -86,6 +86,8 @@ static void centaur_set_mcr(unsigned int reg, unsigned long base, centaur_mcr[reg].low = low; wrmsr(MSR_IDT_MCR0 + reg, low, high); } + +#if 0 /* * Initialise the later (saner) Winchip MCR variant. In this version * the BIOS can pass us the registers it has used (but not their values) @@ -183,6 +185,7 @@ centaur_mcr_init(void) set_mtrr_done(&ctxt); } +#endif static int centaur_validate_add_page(unsigned long base, unsigned long size, unsigned int type) @@ -203,7 +206,7 @@ static int centaur_validate_add_page(unsigned long base, static struct mtrr_ops centaur_mtrr_ops = { .vendor = X86_VENDOR_CENTAUR, - .init = centaur_mcr_init, +// .init = centaur_mcr_init, .set = centaur_set_mcr, .get = centaur_get_mcr, .get_free_region = centaur_get_free_region, diff --git a/xen/arch/x86/mtrr/cyrix.c b/xen/arch/x86/cpu/mtrr/cyrix.c similarity index 97% rename from xen/arch/x86/mtrr/cyrix.c rename to xen/arch/x86/cpu/mtrr/cyrix.c index ca76bb7503..10eebf15d8 100644 --- a/xen/arch/x86/mtrr/cyrix.c +++ b/xen/arch/x86/cpu/mtrr/cyrix.c @@ -218,12 +218,12 @@ typedef struct { mtrr_type type; } arr_state_t; -arr_state_t arr_state[8] __initdata = { +static arr_state_t arr_state[8] __devinitdata = { {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL}, {0UL, 0UL, 0UL} }; -unsigned char ccr_state[7] __initdata = { 0, 0, 0, 0, 0, 0, 0 }; +static unsigned char ccr_state[7] __devinitdata = { 0, 0, 0, 0, 0, 0, 0 }; static void cyrix_set_all(void) { @@ -243,6 +243,7 @@ static void cyrix_set_all(void) post_set(); } +#if 0 /* * On Cyrix 6x86(MX) and M II the ARR3 is special: it has connection * with the SMM (System Management Mode) mode. So we need the following: @@ -341,10 +342,11 @@ cyrix_arr_init(void) if (ccrc[6]) printk(KERN_INFO "mtrr: ARR3 was write protected, unprotected\n"); } +#endif static struct mtrr_ops cyrix_mtrr_ops = { .vendor = X86_VENDOR_CYRIX, - .init = cyrix_arr_init, +// .init = cyrix_arr_init, .set_all = cyrix_set_all, .set = cyrix_set_arr, .get = cyrix_get_arr, diff --git a/xen/arch/x86/mtrr/generic.c b/xen/arch/x86/cpu/mtrr/generic.c similarity index 85% rename from xen/arch/x86/mtrr/generic.c rename to xen/arch/x86/cpu/mtrr/generic.c index 025d033a64..1382cf30f9 100644 --- a/xen/arch/x86/mtrr/generic.c +++ b/xen/arch/x86/cpu/mtrr/generic.c @@ -19,8 +19,7 @@ struct mtrr_state { }; static unsigned long smp_changes_mask; -struct mtrr_state mtrr_state = {}; - +static struct mtrr_state mtrr_state = {}; /* Get the MSR pair relating to a var range */ static void __init @@ -68,13 +67,6 @@ void __init get_mtrr_state(void) mtrr_state.enabled = (lo & 0xc00) >> 10; } -/* Free resources associated with a struct mtrr_state */ -void __init finalize_mtrr_state(void) -{ - xfree(mtrr_state.var_ranges); - mtrr_state.var_ranges = NULL; -} - /* Some BIOS's are fucked and don't set all MTRRs the same! */ void __init mtrr_state_warn(void) { @@ -92,6 +84,16 @@ void __init mtrr_state_warn(void) printk(KERN_INFO "mtrr: corrected configuration.\n"); } +/* Doesn't attempt to pass an error out to MTRR users + because it's quite complicated in some cases and probably not + worth it because the best error handling is to ignore it. */ +void mtrr_wrmsr(unsigned msr, unsigned a, unsigned b) +{ + if (wrmsr_safe(msr, a, b) < 0) + printk(KERN_ERR + "MTRR: CPU %u: Writing MSR %x to %x:%x failed\n", + smp_processor_id(), msr, a, b); +} int generic_get_free_region(unsigned long base, unsigned long size) /* [SUMMARY] Get a free MTRR. @@ -114,8 +116,8 @@ int generic_get_free_region(unsigned long base, unsigned long size) return -ENOSPC; } -void generic_get_mtrr(unsigned int reg, unsigned long *base, - unsigned int *size, mtrr_type * type) +static void generic_get_mtrr(unsigned int reg, unsigned long *base, + unsigned int *size, mtrr_type * type) { unsigned int mask_lo, mask_hi, base_lo, base_hi; @@ -150,14 +152,14 @@ static int set_fixed_ranges(mtrr_type * frs) rdmsr(MTRRfix64K_00000_MSR, lo, hi); if (p[0] != lo || p[1] != hi) { - wrmsr(MTRRfix64K_00000_MSR, p[0], p[1]); + mtrr_wrmsr(MTRRfix64K_00000_MSR, p[0], p[1]); changed = TRUE; } for (i = 0; i < 2; i++) { rdmsr(MTRRfix16K_80000_MSR + i, lo, hi); if (p[2 + i * 2] != lo || p[3 + i * 2] != hi) { - wrmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], + mtrr_wrmsr(MTRRfix16K_80000_MSR + i, p[2 + i * 2], p[3 + i * 2]); changed = TRUE; } @@ -166,7 +168,7 @@ static int set_fixed_ranges(mtrr_type * frs) for (i = 0; i < 8; i++) { rdmsr(MTRRfix4K_C0000_MSR + i, lo, hi); if (p[6 + i * 2] != lo || p[7 + i * 2] != hi) { - wrmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], + mtrr_wrmsr(MTRRfix4K_C0000_MSR + i, p[6 + i * 2], p[7 + i * 2]); changed = TRUE; } @@ -183,16 +185,18 @@ static int set_mtrr_var_ranges(unsigned int index, struct mtrr_var_range *vr) rdmsr(MTRRphysBase_MSR(index), lo, hi); if ((vr->base_lo & 0xfffff0ffUL) != (lo & 0xfffff0ffUL) - || (vr->base_hi & 0xfUL) != (hi & 0xfUL)) { - wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); + || (vr->base_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != + (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { + mtrr_wrmsr(MTRRphysBase_MSR(index), vr->base_lo, vr->base_hi); changed = TRUE; } rdmsr(MTRRphysMask_MSR(index), lo, hi); if ((vr->mask_lo & 0xfffff800UL) != (lo & 0xfffff800UL) - || (vr->mask_hi & 0xfUL) != (hi & 0xfUL)) { - wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); + || (vr->mask_hi & (size_and_mask >> (32 - PAGE_SHIFT))) != + (hi & (size_and_mask >> (32 - PAGE_SHIFT)))) { + mtrr_wrmsr(MTRRphysMask_MSR(index), vr->mask_lo, vr->mask_hi); changed = TRUE; } return changed; @@ -230,7 +234,7 @@ static unsigned long set_mtrr_state(u32 deftype_lo, u32 deftype_hi) static unsigned long cr4 = 0; static u32 deftype_lo, deftype_hi; -static spinlock_t set_atomicity_lock = SPIN_LOCK_UNLOCKED; +static DEFINE_SPINLOCK(set_atomicity_lock); /* * Since we are disabling the cache don't allow any interrupts - they @@ -267,7 +271,7 @@ static void prepare_set(void) rdmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); /* Disable MTRRs, and set the default type to uncached */ - wrmsr(MTRRdefType_MSR, deftype_lo & 0xf300UL, deftype_hi); + mtrr_wrmsr(MTRRdefType_MSR, deftype_lo & 0xf300UL, deftype_hi); } static void post_set(void) @@ -276,7 +280,7 @@ static void post_set(void) local_flush_tlb(); /* Intel (P6) standard MTRRs */ - wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); + mtrr_wrmsr(MTRRdefType_MSR, deftype_lo, deftype_hi); /* Enable caches */ write_cr0(read_cr0() & 0xbfffffff); @@ -323,6 +327,9 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, */ { unsigned long flags; + struct mtrr_var_range *vr; + + vr = &mtrr_state.var_ranges[reg]; local_irq_save(flags); prepare_set(); @@ -330,12 +337,16 @@ static void generic_set_mtrr(unsigned int reg, unsigned long base, if (size == 0) { /* The invalid bit is kept in the mask, so we simply clear the relevant mask register to disable a range. */ - wrmsr(MTRRphysMask_MSR(reg), 0, 0); + mtrr_wrmsr(MTRRphysMask_MSR(reg), 0, 0); + memset(vr, 0, sizeof(struct mtrr_var_range)); } else { - wrmsr(MTRRphysBase_MSR(reg), base << PAGE_SHIFT | type, - (base & size_and_mask) >> (32 - PAGE_SHIFT)); - wrmsr(MTRRphysMask_MSR(reg), -size << PAGE_SHIFT | 0x800, - (-size & size_and_mask) >> (32 - PAGE_SHIFT)); + vr->base_lo = base << PAGE_SHIFT | type; + vr->base_hi = (base & size_and_mask) >> (32 - PAGE_SHIFT); + vr->mask_lo = -size << PAGE_SHIFT | 0x800; + vr->mask_hi = (-size & size_and_mask) >> (32 - PAGE_SHIFT); + + mtrr_wrmsr(MTRRphysBase_MSR(reg), vr->base_lo, vr->base_hi); + mtrr_wrmsr(MTRRphysMask_MSR(reg), vr->mask_lo, vr->mask_hi); } post_set(); @@ -382,7 +393,7 @@ int generic_validate_add_page(unsigned long base, unsigned long size, unsigned i } -int generic_have_wrcomb(void) +static int generic_have_wrcomb(void) { unsigned long config, dummy; rdmsr(MTRRcap_MSR, config, dummy); diff --git a/xen/arch/x86/mtrr/main.c b/xen/arch/x86/cpu/mtrr/main.c similarity index 86% rename from xen/arch/x86/mtrr/main.c rename to xen/arch/x86/cpu/mtrr/main.c index 48ea94ae1f..4737caf8aa 100644 --- a/xen/arch/x86/mtrr/main.c +++ b/xen/arch/x86/cpu/mtrr/main.c @@ -42,17 +42,18 @@ #include #include "mtrr.h" -#define MTRR_VERSION "2.0 (20020519)" - /* No blocking mutexes in Xen. Spin instead. */ #define DECLARE_MUTEX(_m) spinlock_t _m = SPIN_LOCK_UNLOCKED #define down(_m) spin_lock(_m) #define up(_m) spin_unlock(_m) +#define lock_cpu_hotplug() ((void)0) +#define unlock_cpu_hotplug() ((void)0) +#define dump_stack() ((void)0) u32 num_var_ranges = 0; unsigned int *usage_table; -static DECLARE_MUTEX(main_lock); +static DECLARE_MUTEX(mtrr_sem); u32 size_or_mask, size_and_mask; @@ -60,10 +61,6 @@ static struct mtrr_ops * mtrr_ops[X86_VENDOR_NUM] = {}; struct mtrr_ops * mtrr_if = NULL; -__initdata char *mtrr_if_name[] = { - "none", "Intel", "AMD K6", "Cyrix ARR", "Centaur MCR" -}; - static void set_mtrr(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type); @@ -98,7 +95,7 @@ static int have_wrcomb(void) } /* This function returns the number of variable MTRRs */ -void __init set_num_var_ranges(void) +static void __init set_num_var_ranges(void) { unsigned long config = 0, dummy; @@ -329,8 +326,10 @@ int mtrr_add_page(unsigned long base, unsigned long size, error = -EINVAL; + /* No CPU hotplug when we change MTRR entries */ + lock_cpu_hotplug(); /* Search for existing MTRR */ - down(&main_lock); + down(&mtrr_sem); for (i = 0; i < num_var_ranges; ++i) { mtrr_if->get(i, &lbase, &lsize, <ype); if (base >= lbase + lsize) @@ -368,10 +367,24 @@ int mtrr_add_page(unsigned long base, unsigned long size, printk(KERN_INFO "mtrr: no more MTRRs available\n"); error = i; out: - up(&main_lock); + up(&mtrr_sem); + unlock_cpu_hotplug(); return error; } +static int mtrr_check(unsigned long base, unsigned long size) +{ + if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { + printk(KERN_WARNING + "mtrr: size and base must be multiples of 4 kiB\n"); + printk(KERN_DEBUG + "mtrr: size: 0x%lx base: 0x%lx\n", size, base); + dump_stack(); + return -1; + } + return 0; +} + /** * mtrr_add - Add a memory type region * @base: Physical base address of region @@ -412,11 +425,8 @@ int mtrr_add(unsigned long base, unsigned long size, unsigned int type, char increment) { - if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { - printk(KERN_WARNING "mtrr: size and base must be multiples of 4 kiB\n"); - printk(KERN_DEBUG "mtrr: size: 0x%lx base: 0x%lx\n", size, base); + if (mtrr_check(base, size)) return -EINVAL; - } return mtrr_add_page(base >> PAGE_SHIFT, size >> PAGE_SHIFT, type, increment); } @@ -448,7 +458,9 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) return -ENXIO; max = num_var_ranges; - down(&main_lock); + /* No CPU hotplug when we change MTRR entries */ + lock_cpu_hotplug(); + down(&mtrr_sem); if (reg < 0) { /* Search for existing MTRR */ for (i = 0; i < max; ++i) { @@ -487,7 +499,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) set_mtrr(reg, 0, 0, 0); error = reg; out: - up(&main_lock); + up(&mtrr_sem); + unlock_cpu_hotplug(); return error; } /** @@ -508,11 +521,8 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size) int mtrr_del(int reg, unsigned long base, unsigned long size) { - if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) { - printk(KERN_INFO "mtrr: size and base must be multiples of 4 kiB\n"); - printk(KERN_DEBUG "mtrr: size: 0x%lx base: 0x%lx\n", size, base); + if (mtrr_check(base, size)) return -EINVAL; - } return mtrr_del_page(reg, base >> PAGE_SHIFT, size >> PAGE_SHIFT); } @@ -534,21 +544,9 @@ static void __init init_ifs(void) centaur_init_mtrr(); } -static void __init init_other_cpus(void) -{ - if (use_intel()) - get_mtrr_state(); - - /* bring up the other processors */ - set_mtrr(~0U,0,0,0); - - if (use_intel()) { - finalize_mtrr_state(); - mtrr_state_warn(); - } -} - - +/* The suspend/resume methods are only for CPU without MTRR. CPU using generic + * MTRR driver doesn't require this + */ struct mtrr_value { mtrr_type ltype; unsigned long lbase; @@ -556,13 +554,13 @@ struct mtrr_value { }; /** - * mtrr_init - initialize mtrrs on the boot CPU + * mtrr_bp_init - initialize mtrrs on the boot CPU * * This needs to be called early; before any of the other CPUs are * initialized (i.e. before smp_init()). * */ -static int __init mtrr_init(void) +void __init mtrr_bp_init(void) { init_ifs(); @@ -570,40 +568,29 @@ static int __init mtrr_init(void) mtrr_if = &generic_mtrr_ops; size_or_mask = 0xff000000; /* 36 bits */ size_and_mask = 0x00f00000; - - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_AMD: - /* The original Athlon docs said that - total addressable memory is 44 bits wide. - It was not really clear whether its MTRRs - follow this or not. (Read: 44 or 36 bits). - However, "x86-64_overview.pdf" explicitly - states that "previous implementations support - 36 bit MTRRs" and also provides a way to - query the width (in bits) of the physical - addressable memory on the Hammer family. - */ - if (boot_cpu_data.x86 == 15 - && (cpuid_eax(0x80000000) >= 0x80000008)) { - u32 phys_addr; - phys_addr = cpuid_eax(0x80000008) & 0xff; - size_or_mask = - ~((1 << (phys_addr - PAGE_SHIFT)) - 1); - size_and_mask = ~size_or_mask & 0xfff00000; - } - /* Athlon MTRRs use an Intel-compatible interface for - * getting and setting */ - break; - case X86_VENDOR_CENTAUR: - if (boot_cpu_data.x86 == 6) { - /* VIA Cyrix family have Intel style MTRRs, but don't support PAE */ - size_or_mask = 0xfff00000; /* 32 bits */ - size_and_mask = 0; - } - break; - - default: - break; + + /* This is an AMD specific MSR, but we assume(hope?) that + Intel will implement it to when they extend the address + bus of the Xeon. */ + if (cpuid_eax(0x80000000) >= 0x80000008) { + u32 phys_addr; + phys_addr = cpuid_eax(0x80000008) & 0xff; + /* CPUID workaround for Intel 0F33/0F34 CPU */ + if (boot_cpu_data.x86_vendor == X86_VENDOR_INTEL && + boot_cpu_data.x86 == 0xF && + boot_cpu_data.x86_model == 0x3 && + (boot_cpu_data.x86_mask == 0x3 || + boot_cpu_data.x86_mask == 0x4)) + phys_addr = 36; + + size_or_mask = ~((1 << (phys_addr - PAGE_SHIFT)) - 1); + size_and_mask = ~size_or_mask & 0xfff00000; + } else if (boot_cpu_data.x86_vendor == X86_VENDOR_CENTAUR && + boot_cpu_data.x86 == 6) { + /* VIA C* family have Intel style MTRRs, but + don't support PAE */ + size_or_mask = 0xfff00000; /* 32 bits */ + size_and_mask = 0; } } else { switch (boot_cpu_data.x86_vendor) { @@ -633,15 +620,42 @@ static int __init mtrr_init(void) break; } } - printk(KERN_INFO "mtrr: v%s\n",MTRR_VERSION); if (mtrr_if) { set_num_var_ranges(); init_table(); - init_other_cpus(); - return 0; + if (use_intel()) + get_mtrr_state(); } - return -ENXIO; } -__initcall(mtrr_init); +void mtrr_ap_init(void) +{ + unsigned long flags; + + if (!mtrr_if || !use_intel()) + return; + /* + * Ideally we should hold mtrr_sem here to avoid mtrr entries changed, + * but this routine will be called in cpu boot time, holding the lock + * breaks it. This routine is called in two cases: 1.very earily time + * of software resume, when there absolutely isn't mtrr entry changes; + * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to + * prevent mtrr entry changes + */ + local_irq_save(flags); + + mtrr_if->set_all(); + + local_irq_restore(flags); +} + +static int __init mtrr_init_finialize(void) +{ + if (!mtrr_if) + return 0; + if (use_intel()) + mtrr_state_warn(); + return 0; +} +__initcall(mtrr_init_finialize); diff --git a/xen/arch/x86/mtrr/mtrr.h b/xen/arch/x86/cpu/mtrr/mtrr.h similarity index 95% rename from xen/arch/x86/mtrr/mtrr.h rename to xen/arch/x86/cpu/mtrr/mtrr.h index 0d10c115db..99c9f26820 100644 --- a/xen/arch/x86/mtrr/mtrr.h +++ b/xen/arch/x86/cpu/mtrr/mtrr.h @@ -37,7 +37,7 @@ typedef u8 mtrr_type; struct mtrr_ops { u32 vendor; u32 use_intel_if; - void (*init)(void); +// void (*init)(void); void (*set)(unsigned int reg, unsigned long base, unsigned long size, mtrr_type type); void (*set_all)(void); @@ -57,7 +57,6 @@ extern int generic_validate_add_page(unsigned long base, unsigned long size, extern struct mtrr_ops generic_mtrr_ops; -extern int generic_have_wrcomb(void); extern int positive_have_wrcomb(void); /* library functions for processor-specific routines */ @@ -92,8 +91,7 @@ extern struct mtrr_ops * mtrr_if; extern unsigned int num_var_ranges; -void finalize_mtrr_state(void); void mtrr_state_warn(void); char *mtrr_attrib_to_str(int x); +void mtrr_wrmsr(unsigned, unsigned, unsigned); -extern char * mtrr_if_name[]; diff --git a/xen/arch/x86/mtrr/state.c b/xen/arch/x86/cpu/mtrr/state.c similarity index 93% rename from xen/arch/x86/mtrr/state.c rename to xen/arch/x86/cpu/mtrr/state.c index 7939100833..92d8bace61 100644 --- a/xen/arch/x86/mtrr/state.c +++ b/xen/arch/x86/cpu/mtrr/state.c @@ -42,7 +42,7 @@ void set_mtrr_cache_disable(struct set_mtrr_context *ctxt) { if (use_intel()) /* Disable MTRRs, and set the default type to uncached */ - wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL, + mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo & 0xf300UL, ctxt->deftype_hi); else if (is_cpu(CYRIX)) /* Cyrix ARRs - everything else were excluded at the top */ @@ -60,7 +60,7 @@ void set_mtrr_done(struct set_mtrr_context *ctxt) /* Restore MTRRdefType */ if (use_intel()) /* Intel (P6) standard MTRRs */ - wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); + mtrr_wrmsr(MTRRdefType_MSR, ctxt->deftype_lo, ctxt->deftype_hi); else /* Cyrix ARRs - everything else was excluded at the top */ setCx86(CX86_CCR3, ctxt->ccr3); diff --git a/xen/arch/x86/dom0_ops.c b/xen/arch/x86/dom0_ops.c index 600735f7db..69fcb76aef 100644 --- a/xen/arch/x86/dom0_ops.c +++ b/xen/arch/x86/dom0_ops.c @@ -26,7 +26,7 @@ #include #include -#include "mtrr/mtrr.h" +#include "cpu/mtrr/mtrr.h" #define TRC_DOM0OP_ENTER_BASE 0x00020000 #define TRC_DOM0OP_LEAVE_BASE 0x00030000 @@ -39,13 +39,13 @@ static unsigned long msr_hi; static void write_msr_for(void *unused) { if ( ((1 << smp_processor_id()) & msr_cpu_mask) ) - (void)wrmsr_user(msr_addr, msr_lo, msr_hi); + (void)wrmsr_safe(msr_addr, msr_lo, msr_hi); } static void read_msr_for(void *unused) { if ( ((1 << smp_processor_id()) & msr_cpu_mask) ) - (void)rdmsr_user(msr_addr, msr_lo, msr_hi); + (void)rdmsr_safe(msr_addr, msr_lo, msr_hi); } long arch_do_dom0_op(struct dom0_op *op, struct dom0_op *u_dom0_op) @@ -182,7 +182,7 @@ long arch_do_dom0_op(struct dom0_op *op, struct dom0_op *u_dom0_op) dom0_physinfo_t *pi = &op->u.physinfo; pi->threads_per_core = smp_num_siblings; - pi->cores_per_socket = boot_cpu_data.x86_num_cores; + pi->cores_per_socket = boot_cpu_data.x86_max_cores; pi->sockets_per_node = num_online_cpus() / (pi->threads_per_core * pi->cores_per_socket); pi->nr_nodes = 1; diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c index 1c83f4a79a..830d03921a 100644 --- a/xen/arch/x86/hvm/svm/svm.c +++ b/xen/arch/x86/hvm/svm/svm.c @@ -69,8 +69,6 @@ extern void do_nmi(struct cpu_user_regs *, unsigned long); extern int inst_copy_from_guest(unsigned char *buf, unsigned long guest_eip, int inst_len); extern asmlinkage void do_IRQ(struct cpu_user_regs *); -extern void smp_apic_timer_interrupt(struct cpu_user_regs *); -extern void timer_interrupt(int, void *, struct cpu_user_regs *); extern void send_pio_req(struct cpu_user_regs *regs, unsigned long port, unsigned long count, int size, long value, int dir, int pvalid); extern int svm_instrlen(struct cpu_user_regs *regs, int mode); @@ -1761,7 +1759,7 @@ static inline void svm_do_msr_access(struct vcpu *v, struct cpu_user_regs *regs) default: if (long_mode_do_msr_read(regs)) goto done; - rdmsr_user(regs->ecx, regs->eax, regs->edx); + rdmsr_safe(regs->ecx, regs->eax, regs->edx); break; } } diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c index 05517ae291..7348181fe9 100644 --- a/xen/arch/x86/hvm/vmx/vmx.c +++ b/xen/arch/x86/hvm/vmx/vmx.c @@ -1568,7 +1568,7 @@ static inline void vmx_do_msr_read(struct cpu_user_regs *regs) default: if(long_mode_do_msr_read(regs)) return; - rdmsr_user(regs->ecx, regs->eax, regs->edx); + rdmsr_safe(regs->ecx, regs->eax, regs->edx); break; } @@ -1658,13 +1658,12 @@ static inline void vmx_vmexit_do_extint(struct cpu_user_regs *regs) int error; asmlinkage void do_IRQ(struct cpu_user_regs *); - void smp_apic_timer_interrupt(struct cpu_user_regs *); - void timer_interrupt(int, void *, struct cpu_user_regs *); - void smp_event_check_interrupt(void); - void smp_invalidate_interrupt(void); - void smp_call_function_interrupt(void); - void smp_spurious_interrupt(struct cpu_user_regs *regs); - void smp_error_interrupt(struct cpu_user_regs *regs); + fastcall void smp_apic_timer_interrupt(struct cpu_user_regs *); + fastcall void smp_event_check_interrupt(void); + fastcall void smp_invalidate_interrupt(void); + fastcall void smp_call_function_interrupt(void); + fastcall void smp_spurious_interrupt(struct cpu_user_regs *regs); + fastcall void smp_error_interrupt(struct cpu_user_regs *regs); if ((error = __vmread(VM_EXIT_INTR_INFO, &vector)) && !(vector & INTR_INFO_VALID_MASK)) diff --git a/xen/arch/x86/i8259.c b/xen/arch/x86/i8259.c index 38fb41c467..39ff8ce3cb 100644 --- a/xen/arch/x86/i8259.c +++ b/xen/arch/x86/i8259.c @@ -68,9 +68,10 @@ BUILD_SMP_INTERRUPT(call_function_interrupt,CALL_FUNCTION_VECTOR) * overflow. Linux uses the local APIC timer interrupt to get * a much simpler SMP time architecture: */ -BUILD_SMP_TIMER_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) +BUILD_SMP_INTERRUPT(apic_timer_interrupt,LOCAL_TIMER_VECTOR) BUILD_SMP_INTERRUPT(error_interrupt,ERROR_APIC_VECTOR) BUILD_SMP_INTERRUPT(spurious_interrupt,SPURIOUS_APIC_VECTOR) +BUILD_SMP_INTERRUPT(thermal_interrupt,THERMAL_APIC_VECTOR) #define IRQ(x,y) \ IRQ##x##y##_interrupt @@ -391,6 +392,7 @@ void __init init_IRQ(void) /* IPI vectors for APIC spurious and error interrupts. */ set_intr_gate(SPURIOUS_APIC_VECTOR, spurious_interrupt); set_intr_gate(ERROR_APIC_VECTOR, error_interrupt); + set_intr_gate(THERMAL_APIC_VECTOR, thermal_interrupt); /* Set the clock to HZ Hz */ #define CLOCK_TICK_RATE 1193180 /* crystal freq (Hz) */ diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c index f586de2ad1..bd981147d8 100644 --- a/xen/arch/x86/setup.c +++ b/xen/arch/x86/setup.c @@ -440,7 +440,7 @@ void __init __start_xen(multiboot_info_t *mbi) { max_cpus = 0; smp_num_siblings = 1; - boot_cpu_data.x86_num_cores = 1; + boot_cpu_data.x86_max_cores = 1; } smp_prepare_cpus(max_cpus); diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c index b0782ec5d7..3154473f26 100644 --- a/xen/arch/x86/smp.c +++ b/xen/arch/x86/smp.c @@ -188,7 +188,7 @@ static spinlock_t flush_lock = SPIN_LOCK_UNLOCKED; static cpumask_t flush_cpumask; static unsigned long flush_va; -asmlinkage void smp_invalidate_interrupt(void) +fastcall void smp_invalidate_interrupt(void) { ack_APIC_irq(); perfc_incrc(ipis); @@ -339,13 +339,13 @@ void smp_send_stop(void) local_irq_enable(); } -asmlinkage void smp_event_check_interrupt(void) +fastcall void smp_event_check_interrupt(struct cpu_user_regs *regs) { ack_APIC_irq(); perfc_incrc(ipis); } -asmlinkage void smp_call_function_interrupt(void) +fastcall void smp_call_function_interrupt(struct cpu_user_regs *regs) { void (*func)(void *info) = call_data->func; void *info = call_data->info; diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c index 98e07ea4bf..5123a18fee 100644 --- a/xen/arch/x86/smpboot.c +++ b/xen/arch/x86/smpboot.c @@ -53,8 +53,6 @@ static int _foo; #define set_kernel_exec(x,y) (_foo=0) #define alloc_bootmem_low_pages(x) __va(0x90000) /* trampoline address */ -int tainted; -#define TAINT_UNSAFE_SMP 0 /* Set if we find a B stepping CPU */ static int __initdata smp_b_stepping; @@ -1110,7 +1108,7 @@ static void __init smp_boot_cpus(unsigned int max_cpus) smp_num_siblings = siblings; } - if (c->x86_num_cores > 1) { + if (c->x86_max_cores > 1) { for (i = 0; i < NR_CPUS; i++) { if (!cpu_isset(i, cpu_callout_map)) continue; diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c index f0fb8e5e28..f5821ef195 100644 --- a/xen/arch/x86/traps.c +++ b/xen/arch/x86/traps.c @@ -981,26 +981,26 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) { #ifdef CONFIG_X86_64 case MSR_FS_BASE: - if ( wrmsr_user(MSR_FS_BASE, regs->eax, regs->edx) ) + if ( wrmsr_safe(MSR_FS_BASE, regs->eax, regs->edx) ) goto fail; v->arch.guest_context.fs_base = ((u64)regs->edx << 32) | regs->eax; break; case MSR_GS_BASE: - if ( wrmsr_user(MSR_GS_BASE, regs->eax, regs->edx) ) + if ( wrmsr_safe(MSR_GS_BASE, regs->eax, regs->edx) ) goto fail; v->arch.guest_context.gs_base_kernel = ((u64)regs->edx << 32) | regs->eax; break; case MSR_SHADOW_GS_BASE: - if ( wrmsr_user(MSR_SHADOW_GS_BASE, regs->eax, regs->edx) ) + if ( wrmsr_safe(MSR_SHADOW_GS_BASE, regs->eax, regs->edx) ) goto fail; v->arch.guest_context.gs_base_user = ((u64)regs->edx << 32) | regs->eax; break; #endif default: - if ( (rdmsr_user(regs->ecx, l, h) != 0) || + if ( (rdmsr_safe(regs->ecx, l, h) != 0) || (regs->ecx != MSR_EFER) || (regs->eax != l) || (regs->edx != h) ) DPRINTK("Domain attempted WRMSR %p from " @@ -1028,13 +1028,13 @@ static int emulate_privileged_op(struct cpu_user_regs *regs) break; #endif case MSR_EFER: - if ( rdmsr_user(regs->ecx, regs->eax, regs->edx) ) + if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) ) goto fail; break; default: DPRINTK("Domain attempted RDMSR %p.\n", _p(regs->ecx)); /* Everyone can read the MSR space. */ - if ( rdmsr_user(regs->ecx, regs->eax, regs->edx) ) + if ( rdmsr_safe(regs->ecx, regs->eax, regs->edx) ) goto fail; break; } diff --git a/xen/arch/x86/x86_32/traps.c b/xen/arch/x86/x86_32/traps.c index 16829ed499..3a12b0c161 100644 --- a/xen/arch/x86/x86_32/traps.c +++ b/xen/arch/x86/x86_32/traps.c @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -20,6 +21,7 @@ void show_registers(struct cpu_user_regs *regs) { struct cpu_user_regs fault_regs = *regs; unsigned long fault_crs[8]; + char taint_str[TAINT_STRING_MAX_LEN]; const char *context; if ( HVM_DOMAIN(current) && GUEST_MODE(regs) ) @@ -49,6 +51,9 @@ void show_registers(struct cpu_user_regs *regs) fault_crs[3] = read_cr3(); } + printk("----[ Xen-%d.%d%s %s ]----\n", + XEN_VERSION, XEN_SUBVERSION, XEN_EXTRAVERSION, + print_tainted(taint_str)); printk("CPU: %d\nEIP: %04x:[<%08x>]", smp_processor_id(), fault_regs.cs, fault_regs.eip); if ( !GUEST_MODE(regs) ) @@ -201,11 +206,11 @@ unsigned long do_iret(void) } BUILD_SMP_INTERRUPT(deferred_nmi, TRAP_deferred_nmi) -asmlinkage void smp_deferred_nmi(struct cpu_user_regs regs) +fastcall void smp_deferred_nmi(struct cpu_user_regs *regs) { asmlinkage void do_nmi(struct cpu_user_regs *); ack_APIC_irq(); - do_nmi(®s); + do_nmi(regs); } void __init percpu_traps_init(void) diff --git a/xen/arch/x86/x86_64/mm.c b/xen/arch/x86/x86_64/mm.c index 01b54ef9ab..2d66253536 100644 --- a/xen/arch/x86/x86_64/mm.c +++ b/xen/arch/x86/x86_64/mm.c @@ -243,21 +243,21 @@ long do_set_segment_base(unsigned int which, unsigned long base) switch ( which ) { case SEGBASE_FS: - if ( wrmsr_user(MSR_FS_BASE, base, base>>32) ) + if ( wrmsr_safe(MSR_FS_BASE, base, base>>32) ) ret = -EFAULT; else v->arch.guest_context.fs_base = base; break; case SEGBASE_GS_USER: - if ( wrmsr_user(MSR_SHADOW_GS_BASE, base, base>>32) ) + if ( wrmsr_safe(MSR_SHADOW_GS_BASE, base, base>>32) ) ret = -EFAULT; else v->arch.guest_context.gs_base_user = base; break; case SEGBASE_GS_KERNEL: - if ( wrmsr_user(MSR_GS_BASE, base, base>>32) ) + if ( wrmsr_safe(MSR_GS_BASE, base, base>>32) ) ret = -EFAULT; else v->arch.guest_context.gs_base_kernel = base; diff --git a/xen/arch/x86/x86_64/traps.c b/xen/arch/x86/x86_64/traps.c index 8f66023b75..b4cad7ef85 100644 --- a/xen/arch/x86/x86_64/traps.c +++ b/xen/arch/x86/x86_64/traps.c @@ -1,5 +1,6 @@ #include +#include #include #include #include @@ -20,6 +21,7 @@ void show_registers(struct cpu_user_regs *regs) { struct cpu_user_regs fault_regs = *regs; unsigned long fault_crs[8]; + char taint_str[TAINT_STRING_MAX_LEN]; const char *context; if ( HVM_DOMAIN(current) && GUEST_MODE(regs) ) @@ -35,6 +37,9 @@ void show_registers(struct cpu_user_regs *regs) fault_crs[3] = read_cr3(); } + printk("----[ Xen-%d.%d%s %s ]----\n", + XEN_VERSION, XEN_SUBVERSION, XEN_EXTRAVERSION, + print_tainted(taint_str)); printk("CPU: %d\nRIP: %04x:[<%016lx>]", smp_processor_id(), fault_regs.cs, fault_regs.rip); if ( !GUEST_MODE(regs) ) diff --git a/xen/common/kernel.c b/xen/common/kernel.c index 3a4e420bc1..9d08fe0996 100644 --- a/xen/common/kernel.c +++ b/xen/common/kernel.c @@ -14,6 +14,8 @@ #include #include +int tainted; + void cmdline_parse(char *cmdline) { char opt[100], *optval, *p = cmdline, *q; @@ -78,6 +80,37 @@ void cmdline_parse(char *cmdline) } } +/** + * print_tainted - return a string to represent the kernel taint state. + * + * 'S' - SMP with CPUs not designed for SMP. + * 'M' - Machine had a machine check experience. + * 'B' - System has hit bad_page. + * + * The string is overwritten by the next call to print_taint(). + */ +char *print_tainted(char *str) +{ + if ( tainted ) + { + snprintf(str, TAINT_STRING_MAX_LEN, "Tainted: %c%c%c", + tainted & TAINT_UNSAFE_SMP ? 'S' : ' ', + tainted & TAINT_MACHINE_CHECK ? 'M' : ' ', + tainted & TAINT_BAD_PAGE ? 'B' : ' '); + } + else + { + snprintf(str, TAINT_STRING_MAX_LEN, "Not tainted"); + } + + return str; +} + +void add_taint(unsigned flag) +{ + tainted |= flag; +} + /* * Simple hypercalls. */ diff --git a/xen/include/asm-x86/bitops.h b/xen/include/asm-x86/bitops.h index 8bf54fcdc3..605a091da8 100644 --- a/xen/include/asm-x86/bitops.h +++ b/xen/include/asm-x86/bitops.h @@ -321,6 +321,58 @@ static __inline__ unsigned int find_first_set_bit(unsigned long word) return (unsigned int)word; } +/** + * ffz - find first zero in word. + * @word: The word to search + * + * Undefined if no zero exists, so code should check against ~0UL first. + */ +static inline unsigned long ffz(unsigned long word) +{ + __asm__("bsf %1,%0" + :"=r" (word) + :"r" (~word)); + return word; +} + +#define fls64(x) generic_fls64(x) + +/** + * ffs - find first bit set + * @x: the word to search + * + * This is defined the same way as + * the libc and compiler builtin ffs routines, therefore + * differs in spirit from the above ffz (man ffs). + */ +static inline int ffs(int x) +{ + int r; + + __asm__("bsfl %1,%0\n\t" + "jnz 1f\n\t" + "movl $-1,%0\n" + "1:" : "=r" (r) : "rm" (x)); + return r+1; +} + +/** + * fls - find last bit set + * @x: the word to search + * + * This is defined the same way as ffs. + */ +static inline int fls(int x) +{ + int r; + + __asm__("bsrl %1,%0\n\t" + "jnz 1f\n\t" + "movl $-1,%0\n" + "1:" : "=r" (r) : "rm" (x)); + return r+1; +} + /** * hweightN - returns the hamming weight of a N-bit word * @x: the word to weigh diff --git a/xen/include/asm-x86/config.h b/xen/include/asm-x86/config.h index 27ae8c3a96..502a136fd8 100644 --- a/xen/include/asm-x86/config.h +++ b/xen/include/asm-x86/config.h @@ -23,6 +23,7 @@ #define CONFIG_X86_GOOD_APIC 1 #define CONFIG_X86_IO_APIC 1 #define CONFIG_HPET_TIMER 1 +#define CONFIG_X86_MCE_P4THERMAL 1 /* Intel P4 currently has largest cache line (L2 line size is 128 bytes). */ #define CONFIG_X86_L1_CACHE_SHIFT 7 diff --git a/xen/include/asm-x86/cpufeature.h b/xen/include/asm-x86/cpufeature.h index 938b765808..0a815ac6a7 100644 --- a/xen/include/asm-x86/cpufeature.h +++ b/xen/include/asm-x86/cpufeature.h @@ -69,6 +69,7 @@ #define X86_FEATURE_K7 (3*32+ 5) /* Athlon */ #define X86_FEATURE_P3 (3*32+ 6) /* P3 */ #define X86_FEATURE_P4 (3*32+ 7) /* P4 */ +#define X86_FEATURE_CONSTANT_TSC (3*32+ 8) /* TSC ticks at a constant rate */ /* Intel-defined CPU features, CPUID level 0x00000001 (ecx), word 4 */ #define X86_FEATURE_XMM3 (4*32+ 0) /* Streaming SIMD Extensions-3 */ @@ -88,9 +89,9 @@ #define X86_FEATURE_XCRYPT_EN (5*32+ 7) /* on-CPU crypto enabled */ /* More extended AMD flags: CPUID level 0x80000001, ecx, word 6 */ -#define X86_FEATURE_LAHF_LM (5*32+ 0) /* LAHF/SAHF in long mode */ -#define X86_FEATURE_CMP_LEGACY (5*32+ 1) /* If yes HyperThreading not valid */ -#define X86_FEATURE_SVME (5*32+ 2) /* Secure Virtual Machine */ +#define X86_FEATURE_LAHF_LM (6*32+ 0) /* LAHF/SAHF in long mode */ +#define X86_FEATURE_CMP_LEGACY (6*32+ 1) /* If yes HyperThreading not valid */ +#define X86_FEATURE_SVME (6*32+ 2) /* Secure Virtual Machine */ #define cpu_has(c, bit) test_bit(bit, (c)->x86_capability) #define boot_cpu_has(bit) test_bit(bit, boot_cpu_data.x86_capability) diff --git a/xen/include/asm-x86/msr.h b/xen/include/asm-x86/msr.h index 8b5ea950e1..932cfcabb5 100644 --- a/xen/include/asm-x86/msr.h +++ b/xen/include/asm-x86/msr.h @@ -28,34 +28,36 @@ static inline void wrmsrl(unsigned int msr, __u64 val) wrmsr(msr, lo, hi); } -#define rdmsr_user(msr,val1,val2) ({\ +/* rdmsr with exception handling */ +#define rdmsr_safe(msr,val1,val2) ({\ int _rc; \ __asm__ __volatile__( \ "1: rdmsr\n2:\n" \ ".section .fixup,\"ax\"\n" \ - "3: movl $1,%2\n; jmp 2b\n" \ + "3: movl %5,%2\n; jmp 2b\n" \ ".previous\n" \ ".section __ex_table,\"a\"\n" \ " "__FIXUP_ALIGN"\n" \ " "__FIXUP_WORD" 1b,3b\n" \ ".previous\n" \ : "=a" (val1), "=d" (val2), "=&r" (_rc) \ - : "c" (msr), "2" (0)); \ + : "c" (msr), "2" (0), "i" (-EFAULT)); \ _rc; }) -#define wrmsr_user(msr,val1,val2) ({\ +/* wrmsr with exception handling */ +#define wrmsr_safe(msr,val1,val2) ({\ int _rc; \ __asm__ __volatile__( \ "1: wrmsr\n2:\n" \ ".section .fixup,\"ax\"\n" \ - "3: movl $1,%0\n; jmp 2b\n" \ + "3: movl %5,%0\n; jmp 2b\n" \ ".previous\n" \ ".section __ex_table,\"a\"\n" \ " "__FIXUP_ALIGN"\n" \ " "__FIXUP_WORD" 1b,3b\n" \ ".previous\n" \ : "=&r" (_rc) \ - : "c" (msr), "a" (val1), "d" (val2), "0" (0)); \ + : "c" (msr), "a" (val1), "d" (val2), "0" (0), "i" (-EFAULT)); \ _rc; }) #define rdtsc(low,high) \ diff --git a/xen/include/asm-x86/processor.h b/xen/include/asm-x86/processor.h index 5ad42cd765..2bd818d539 100644 --- a/xen/include/asm-x86/processor.h +++ b/xen/include/asm-x86/processor.h @@ -147,24 +147,28 @@ struct vcpu; #endif struct cpuinfo_x86 { - __u8 x86; /* CPU family */ - __u8 x86_vendor; /* CPU vendor */ - __u8 x86_model; - __u8 x86_mask; - char wp_works_ok; /* It doesn't on 386's */ - char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */ - char hard_math; - char rfu; + __u8 x86; /* CPU family */ + __u8 x86_vendor; /* CPU vendor */ + __u8 x86_model; + __u8 x86_mask; + char wp_works_ok; /* It doesn't on 386's */ + char hlt_works_ok; /* Problems on some 486Dx4's and old 386's */ + char hard_math; + char rfu; int cpuid_level; /* Maximum supported CPUID level, -1=no CPUID */ - unsigned int x86_capability[NCAPINTS]; - char x86_vendor_id[16]; - char x86_model_id[64]; - int x86_cache_size; /* in KB - valid for CPUS which support this call */ - int x86_cache_alignment; /* In bytes */ - int fdiv_bug; - int f00f_bug; - int coma_bug; - unsigned char x86_num_cores; + unsigned int x86_capability[NCAPINTS]; + char x86_vendor_id[16]; + char x86_model_id[64]; + int x86_cache_size; /* in KB - valid for CPUS which support this call */ + int x86_cache_alignment; /* In bytes */ + char fdiv_bug; + char f00f_bug; + char coma_bug; + char pad0; + int x86_power; + unsigned char x86_max_cores; /* cpuid returned max cores value */ + unsigned char booted_cores; /* number of cores as seen by OS */ + unsigned char apicid; } __cacheline_aligned; /* @@ -208,6 +212,18 @@ static always_inline void detect_ht(struct cpuinfo_x86 *c) {} "=d" (*(int *)(_edx)) \ : "0" (_op), "2" (0)) +/* Some CPUID calls want 'count' to be placed in ecx */ +static inline void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, + int *edx) +{ + __asm__("cpuid" + : "=a" (*eax), + "=b" (*ebx), + "=c" (*ecx), + "=d" (*edx) + : "0" (op), "c" (count)); +} + /* * CPUID functions returning a single datum */ @@ -502,6 +518,11 @@ void show_registers(struct cpu_user_regs *regs); void show_page_walk(unsigned long addr); asmlinkage void fatal_trap(int trapnr, struct cpu_user_regs *regs); +extern void mtrr_ap_init(void); +extern void mtrr_bp_init(void); + +extern void mcheck_init(struct cpuinfo_x86 *c); + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_X86_PROCESSOR_H */ diff --git a/xen/include/asm-x86/x86_32/asm_defns.h b/xen/include/asm-x86/x86_32/asm_defns.h index c3772c6b1c..ca6733a397 100644 --- a/xen/include/asm-x86/x86_32/asm_defns.h +++ b/xen/include/asm-x86/x86_32/asm_defns.h @@ -59,17 +59,6 @@ asmlinkage void x(void); \ __asm__( \ "\n"__ALIGN_STR"\n" \ STR(x) ":\n\t" \ - "pushl $"#v"<<16\n\t" \ - STR(SAVE_ALL(a)) \ - "call "STR(smp_##x)"\n\t" \ - "jmp ret_from_intr\n"); - -#define BUILD_SMP_TIMER_INTERRUPT(x,v) XBUILD_SMP_TIMER_INTERRUPT(x,v) -#define XBUILD_SMP_TIMER_INTERRUPT(x,v) \ -asmlinkage void x(struct cpu_user_regs * regs); \ -__asm__( \ -"\n"__ALIGN_STR"\n" \ -STR(x) ":\n\t" \ "pushl $"#v"<<16\n\t" \ STR(SAVE_ALL(a)) \ "movl %esp,%eax\n\t" \ @@ -78,6 +67,8 @@ STR(x) ":\n\t" \ "addl $4,%esp\n\t" \ "jmp ret_from_intr\n"); +#define BUILD_SMP_TIMER_INTERRUPT(x,v) BUILD_SMP_INTERRUPT(x,v) + #define BUILD_COMMON_IRQ() \ __asm__( \ "\n" __ALIGN_STR"\n" \ diff --git a/xen/include/asm-x86/x86_64/asm_defns.h b/xen/include/asm-x86/x86_64/asm_defns.h index d11af7228a..1d1c43e946 100644 --- a/xen/include/asm-x86/x86_64/asm_defns.h +++ b/xen/include/asm-x86/x86_64/asm_defns.h @@ -66,18 +66,6 @@ asmlinkage void x(void); \ __asm__( \ "\n"__ALIGN_STR"\n" \ STR(x) ":\n\t" \ - "pushq $0\n\t" \ - "movl $"#v",4(%rsp)\n\t" \ - STR(SAVE_ALL) \ - "callq "STR(smp_##x)"\n\t" \ - "jmp ret_from_intr\n"); - -#define BUILD_SMP_TIMER_INTERRUPT(x,v) XBUILD_SMP_TIMER_INTERRUPT(x,v) -#define XBUILD_SMP_TIMER_INTERRUPT(x,v) \ -asmlinkage void x(struct cpu_user_regs * regs); \ -__asm__( \ -"\n"__ALIGN_STR"\n" \ -STR(x) ":\n\t" \ "pushq $0\n\t" \ "movl $"#v",4(%rsp)\n\t" \ STR(SAVE_ALL) \ @@ -85,6 +73,8 @@ STR(x) ":\n\t" \ "callq "STR(smp_##x)"\n\t" \ "jmp ret_from_intr\n"); +#define BUILD_SMP_TIMER_INTERRUPT(x,v) BUILD_SMP_INTERRUPT(x,v) + #define BUILD_COMMON_IRQ() \ __asm__( \ "\n" __ALIGN_STR"\n" \ diff --git a/xen/include/xen/bitops.h b/xen/include/xen/bitops.h index e743c0059d..190d96baa7 100644 --- a/xen/include/xen/bitops.h +++ b/xen/include/xen/bitops.h @@ -76,6 +76,33 @@ static __inline__ int generic_fls(int x) */ #include + +static inline int generic_fls64(__u64 x) +{ + __u32 h = x >> 32; + if (h) + return fls(x) + 32; + return fls(x); +} + +static __inline__ int get_bitmask_order(unsigned int count) +{ + int order; + + order = fls(count); + return order; /* We could be slightly more clever with -1 here... */ +} + +static __inline__ int get_count_order(unsigned int count) +{ + int order; + + order = fls(count) - 1; + if (count & (count - 1)) + order++; + return order; +} + /* * hweightN: returns the hamming weight (i.e. the number * of bits set) of a N-bit word @@ -126,4 +153,26 @@ static inline unsigned long hweight_long(unsigned long w) return sizeof(w) == 4 ? generic_hweight32(w) : generic_hweight64(w); } +/* + * rol32 - rotate a 32-bit value left + * + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u32 rol32(__u32 word, unsigned int shift) +{ + return (word << shift) | (word >> (32 - shift)); +} + +/* + * ror32 - rotate a 32-bit value right + * + * @word: value to rotate + * @shift: bits to roll + */ +static inline __u32 ror32(__u32 word, unsigned int shift) +{ + return (word >> shift) | (word << (32 - shift)); +} + #endif diff --git a/xen/include/xen/init.h b/xen/include/xen/init.h index 0709c9abd8..0903cde903 100644 --- a/xen/include/xen/init.h +++ b/xen/include/xen/init.h @@ -100,4 +100,6 @@ extern struct kernel_param __setup_start, __setup_end; #define __devexitdata __exitdata #endif +#define fastcall + #endif /* _LINUX_INIT_H */ diff --git a/xen/include/xen/lib.h b/xen/include/xen/lib.h index 01440e1533..facfa2b093 100644 --- a/xen/include/xen/lib.h +++ b/xen/include/xen/lib.h @@ -75,4 +75,12 @@ unsigned long long simple_strtoull( unsigned long long parse_size_and_unit(char *s); +#define TAINT_UNSAFE_SMP (1<<0) +#define TAINT_MACHINE_CHECK (1<<1) +#define TAINT_BAD_PAGE (1<<2) +extern int tainted; +#define TAINT_STRING_MAX_LEN 20 +extern char *print_tainted(char *str); +extern void add_taint(unsigned); + #endif /* __LIB_H__ */ -- 2.30.2